rm(list=ls())
gc()

library(data.table)
library(lfe)

data = fread('linked-unlinked-2016-2020.csv.gz')[Sample=='Linked'|Move==1]

data[countyfips=='',countyfips:=NA]
data[ZipCode=='',ZipCode:=NA]
data[Gender=='',Gender:=NA]

data[,MarriageDiff:=(Married_year2)-Married_year1]

# make household composition
data[,hh.n.diff:=hh.n.adj_year2-hh.n.adj_year1]
data[,hh.d.diff:=hh.d.adj_year2-hh.d.adj_year1]
data[,hh.r.diff:=hh.r.adj_year2-hh.r.adj_year1]


# format block group values
data[,DemBlockGroup_year1:=DemBlockGroup]
data[,RepBlockGroup_year1:=RepBlockGroup]
data[,DemBlockGroupDiff:=DemBlockGroup_year2-DemBlockGroup_year1]
data[,RepBlockGroupDiff:=RepBlockGroup_year2-RepBlockGroup_year1]

# subset data
dems = data[Party=='Democrat']
reps = data[Party=='Republican']
oths = data[!Party%in%c('Democrat','Republican')]



dems = dems[!is.na(Age_year1) & !is.na(Gender) & !is.na(Race) & !is.na(Married_year1) & !is.na(ZipCode)]
reps = reps[!is.na(Age_year1) & !is.na(Gender) & !is.na(Race) & !is.na(Married_year1) & !is.na(ZipCode)]
oths = oths[!is.na(Age_year1) & !is.na(Gender) & !is.na(Race) & !is.na(Married_year1) & !is.na(ZipCode)]

dems[,AgeDecile:=cut(Age_year1,breaks=unique(as.numeric(quantile(Age_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
reps[,AgeDecile:=cut(Age_year1,breaks=unique(as.numeric(quantile(Age_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
oths[,AgeDecile:=cut(Age_year1,breaks=unique(as.numeric(quantile(Age_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]




# block treatment
# democrats

# block group treatment
# democrats
dems[,DemBlockGroupDecile:=cut(DemBlockGroup_year1,breaks=unique(as.numeric(quantile(DemBlockGroup_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
reps[,DemBlockGroupDecile:=cut(DemBlockGroup_year1,breaks=unique(as.numeric(quantile(DemBlockGroup_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
oths[,DemBlockGroupDecile:=cut(DemBlockGroup_year1,breaks=unique(as.numeric(quantile(DemBlockGroup_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]

# republicans
dems[,RepBlockGroupDecile:=cut(RepBlockGroup_year1,breaks=unique(as.numeric(quantile(RepBlockGroup_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
reps[,RepBlockGroupDecile:=cut(RepBlockGroup_year1,breaks=unique(as.numeric(quantile(RepBlockGroup_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
oths[,RepBlockGroupDecile:=cut(RepBlockGroup_year1,breaks=unique(as.numeric(quantile(RepBlockGroup_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]





# block group treatment
dems[,GroupDemBlockGroup:=paste(ZipCode,AgeDecile,Race, Gender, DemBlockGroupDecile,State,Married_year1,hh.n.adj_year1,hh.d.adj_year1, sep ='_')]
reps[,GroupDemBlockGroup:=paste(ZipCode,AgeDecile,Race, Gender, DemBlockGroupDecile,State, Married_year1,hh.n.adj_year1,hh.d.adj_year1,sep ='_')]
oths[,GroupDemBlockGroup:=paste(ZipCode,AgeDecile,Race, Gender, DemBlockGroupDecile,State,Married_year1, hh.n.adj_year1,hh.d.adj_year1,sep ='_')]

dems[,GroupRepBlockGroup:=paste(ZipCode,AgeDecile,Race, Gender, RepBlockGroupDecile,State, Married_year1,hh.n.adj_year1,hh.r.adj_year1,sep ='_')]
reps[,GroupRepBlockGroup:=paste(ZipCode,AgeDecile,Race, Gender, RepBlockGroupDecile,State, Married_year1,hh.n.adj_year1,hh.r.adj_year1,sep ='_')]
oths[,GroupRepBlockGroup:=paste(ZipCode,AgeDecile,Race, Gender, RepBlockGroupDecile,State,Married_year1, hh.n.adj_year1,hh.r.adj_year1,sep ='_')]




# blockgroup treatment
ModelDemBlockGroupDems = summary(felm(Move ~ DemBlockGroupDiff+ hh.d.diff+hh.n.diff++WhiteBlockGroupDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                        HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                        DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff|GroupDemBlockGroup|0|countyfips, data = dems[!is.na(countyfips)& !is.infinite(DemBlockGroupDiff)&!is.na(DemBlockGroup_year1)] ), robust = T)

ModelDemBlockGroupReps = summary(felm(Move ~ DemBlockGroupDiff+ hh.d.diff+hh.n.diff+MarriageDiff+WhiteBlockGroupDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                        HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                        DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff|GroupDemBlockGroup|0|countyfips, data = reps[!is.na(countyfips)& !is.infinite(DemBlockGroupDiff)&!is.na(DemBlockGroup_year1)] ), robust = T)

ModelDemBlockGroupOths = summary(felm(Move ~ DemBlockGroupDiff+ hh.d.diff+hh.n.diff+MarriageDiff+WhiteBlockGroupDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                        HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                        DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff|GroupDemBlockGroup|0|countyfips, data = oths[!is.na(countyfips)& !is.infinite(DemBlockGroupDiff)&!is.na(DemBlockGroup_year1)] ), robust = T)


ModelRepBlockGroupDems = summary(felm(Move ~ RepBlockGroupDiff+ hh.r.diff+hh.n.diff+MarriageDiff+WhiteBlockGroupDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                        HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                        DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff|GroupRepBlockGroup|0|countyfips, data = dems[!is.na(countyfips)& !is.infinite(RepBlockGroupDiff)&!is.na(RepBlockGroup_year1)] ), robust = T)

ModelRepBlockGroupReps = summary(felm(Move ~ RepBlockGroupDiff+ hh.r.diff+hh.n.diff+MarriageDiff+WhiteBlockGroupDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                        HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                        DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff|GroupRepBlockGroup|0|countyfips, data = reps[!is.na(countyfips)& !is.infinite(RepBlockGroupDiff)&!is.na(RepBlockGroup_year1)] ), robust = T)

ModelRepBlockGroupOths = summary(felm(Move ~ RepBlockGroupDiff+ hh.r.diff+hh.n.diff+MarriageDiff+WhiteBlockGroupDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                        HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                        DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff|GroupRepBlockGroup|0|countyfips, data = oths[!is.na(countyfips)& !is.infinite(RepBlockGroupDiff)&!is.na(RepBlockGroup_year1)] ), robust = T)


save(ModelDemBlockGroupDems,ModelRepBlockGroupReps,ModelDemBlockGroupReps,ModelDemBlockGroupOths,
     ModelRepBlockGroupDems, ModelRepBlockGroupOths,
     
     
     
     file = 'results/modeling-moving-2016-2020.Rdata'
     
)
